## ============================================================================
## 04_si_s1_prevalence_expert.R
## ----------------------------------------------------------------------------
## Purpose:   Analyse prevalence of land acknowledgments in corporate annual
##            reports and summarise the expert survey results.
##            Corresponds to SI Sections S1.5.1 (corporate reports),
##            S1.5.2 (universities and museums), and S1.7 (expert survey).
##
## Inputs:    combined_data.rds (via 00_setup.R),
##            asx_results_2019to2024.rds, expert_survey.rds
## Outputs:   Tables/table_s11.tex,
##            Tables/table_s12.tex,
##            Tables/table_s13.tex,
##            Tables/table_s14.tex,
##            Tables/table_s15.tex,
##            Tables/table_s16.tex,
##            Figures/fig_s7.pdf,
##            Figures/fig_s8.pdf,
##            Figures/fig_s9.pdf
## ============================================================================

source("00_setup.R")

#### ============== Section S1.5.1: Corporate Annual Reports ============== ####
## ---- Table S11: ASX land acknowledgment counts by year ----
gpt_asx <-
  read_rds("asx_results_2019to2024.rds")

# Ensure text is properly encoded
gpt_asx <-
  gpt_asx %>%
  mutate(acknowledgment_text = iconv(
    acknowledgment_text,
    from = "latin1",
    to = "UTF-8",
    sub = ""
  ))

gpt_asx %>%
  group_by(year) %>%
  summarise(
    count = as.integer(sum(land_acknowledgment)),
    pct   = paste0(count, "\\%")
  ) %>%
  mutate(year = as.character(year)) %>%
  select(year, count, pct) %>%
  xtable() %>%
  print(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE,
        type = "latex", sanitize.text.function = identity, comment = FALSE,
        hline.after = NULL, file = paste0(tab_dir, "table_s11.tex"))

## ---- Table S12: Text analysis of ASX land acknowledgments ----
# Remove potential problematic characters
gpt_asx <-
  gpt_asx %>%
  mutate(
    acknowledgment_text = str_replace_all(acknowledgment_text, "[^[:graph:][:space:]]", "")
  )

# Text complexity measures
gpt_asx <-
  gpt_asx %>%
  mutate(
    # Ensure missing values are handled before processing
    text_clean = ifelse(is.na(acknowledgment_text), "", acknowledgment_text) %>%  # Replace NA with ""
      str_to_lower() %>%
      str_remove_all("[[:punct:]]") %>%
      str_squish(),

    word_count = ifelse(text_clean == "", NA_real_, str_count(text_clean, "\\w+")),
    char_count = ifelse(text_clean == "", NA_real_, nchar(text_clean)),

    lexical_diversity = map_dbl(text_clean, ~{
      if (.x == "" | is.na(.x)) {
        return(NA_real_)  # Assign NA if text is missing or empty
      }

      tokens_obj <- tokens(.x, remove_punct = TRUE)
      if (length(unlist(tokens_obj)) == 0) {
        return(NA_real_)  # Handle cases where text is all stop words
      }

      lexdiv_score <- textstat_lexdiv(dfm(tokens_obj), measure = "TTR")
      lexdiv_score$TTR
    }),

    readability = map_dbl(text_clean, ~{
      if (.x == "" | is.na(.x)) {
        return(NA_real_)  # Avoid errors when text is missing
      }

      readability_score <- textstat_readability(.x, measure = "Flesch.Kincaid")
      readability_score$Flesch.Kincaid
    })
  )

gpt_asx %>%
  group_by(year) %>%
  summarise(
    `\\textit{Word count}` = mean(word_count, na.rm = TRUE),
    `\\textit{Character count}` = mean(char_count, na.rm = TRUE),
    `\\textit{Lexical diversity}` = mean(lexical_diversity, na.rm = TRUE),
    `\\textit{Readability}` = mean(readability, na.rm = TRUE)
  ) %>%
  mutate(year = paste(year)) %>%
  xtable() %>%
  print(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE,
        type = "latex", sanitize.text.function = identity, comment = FALSE,
        hline.after = NULL, file = paste0(tab_dir, "table_s12.tex"))






#### ============== Section S1.5.2: Universities and Museums ============== ####

uni_dat <-
  read_rds("unis_and_museums.rds") %>%
  mutate(
    org_type = str_remove(org_type, " University"),
    org_name = str_replace_all(org_name, "&", "\\\\&"),
    region = case_when(
      region == "Mid-Atlantic"                          ~ "Northeast",
      region == "Mountain West"                         ~ "West",
      region == "Pacific NW"                            ~ "West",
      region == "South Central"                         ~ "South",
      region == "Southwest" & str_detect(org_name, "New Mexico|Arizona") ~ "West",
      region == "Southwest"                             ~ "South",
      TRUE                                              ~ region
    )
  ) %>%
  select(org_name, org_type, country, region, year_adopted) %>%
  arrange(org_name)

## ---- Table S13: Australian universities ----
uni_dat %>%
  filter(country == "Australia", org_type %in% c("Public", "Private")) %>%
  select(-country) %>%
  xtable() %>%
  print(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE,
        type = "latex", sanitize.text.function = identity, comment = FALSE,
        hline.after = NULL, file = paste0(tab_dir, "table_s13.tex"))

## ---- Table S14: U.S. universities ----
uni_dat %>%
  filter(country == "United States", org_type %in% c("Public", "Private")) %>%
  select(-country) %>%
  xtable() %>%
  print(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE,
        type = "latex", sanitize.text.function = identity, comment = FALSE,
        hline.after = NULL, file = paste0(tab_dir, "table_s14.tex"))

## ---- Table S15: Australian galleries and museums ----
uni_dat %>%
  filter(country == "Australia", org_type %in% c("Museum", "Gallery")) %>%
  select(-country) %>%
  xtable() %>%
  print(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE,
        type = "latex", sanitize.text.function = identity, comment = FALSE,
        hline.after = NULL, file = paste0(tab_dir, "table_s15.tex"))

## ---- Table S16: U.S. galleries and museums ----
uni_dat %>%
  filter(country == "United States", org_type %in% c("Museum", "Gallery")) %>%
  select(-country) %>%
  xtable() %>%
  print(include.rownames = FALSE, include.colnames = FALSE, only.contents = TRUE,
        type = "latex", sanitize.text.function = identity, comment = FALSE,
        hline.after = NULL, file = paste0(tab_dir, "table_s16.tex"))





#### ============== Section S1.7: Expert Survey ============== ####

expert_dat <-
  read_rds(
    "expert_survey.rds",
  ) %>%
  dplyr::mutate(dplyr::across(
    c(guess_voice:guess_reparations),
    ~ forcats::fct(., levels = c("Decrease", "No effect", "Increase"))
  ))


## ---- In-text: Observed vs. expert-predicted belief accuracy (Section S1.7) ----
obs_accuracy <-
  combined_dat %>%
  filter(Z_land_info == 0) %>%
  group_by(sample) %>%
  summarise(ybar = mean(y_recall_binary, na.rm = TRUE)) %>%
  mutate(across(where(is.numeric), \(x) round(x, 2)))

obs_accuracy

guess_accuracy <-
  round(mean(expert_dat$guess_belief), 2)

guess_accuracy

## ---- In-text: Expert prediction proportions by outcome (Section S1.7) ----
## Information seeking
table(expert_dat$guess_infoseek) /
  sum(table(expert_dat$guess_infoseek))

## Collective guilt
table(expert_dat$guess_guilt) /
  sum(table(expert_dat$guess_guilt))

## Support for voice
table(expert_dat$guess_voice) /
  sum(table(expert_dat$guess_voice))

## Reparations
table(expert_dat$guess_reparations) /
  sum(table(expert_dat$guess_reparations))

## Corporate social responsibility (vignette)
table(expert_dat$guess_vignette) /
  sum(table(expert_dat$guess_vignette))

## ---- Figure S7: Expert predicted belief accuracy ----
p <-
  ggplot(expert_dat, aes(x = guess_belief)) +
  geom_vline(xintercept = 70,
             col = "red",
             linetype = 2) +
  annotate("text", x = 82, y = Inf, vjust = 1.5,
           label = "Observed belief\naccuracy: 70%", size = 3.5,
           col = "red") +
  geom_dotplot(method = "dotdensity",
               fill = "darkgrey",
               color = "black",
               stackdir = "up",
               dotsize = .9
  ) +
  scale_x_continuous(label = scales::label_percent(scale = 1),
                     expand = c(0.01, 0.01),
                     limits = c(0, 100),
                     breaks = seq(0, 100, 10)) +
  scale_y_continuous(expand = c(0.02, 0), breaks = NULL) +
  labs(x = NULL, y = NULL) +
  theme_tufte() +
  theme(panel.grid.minor.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.ticks.x = element_line(linewidth = 0.25),
        axis.line.x = element_line(linewidth = 0.25),
        axis.title.x = element_text(size = 10),
        axis.text.x = element_text(size = 11),
        axis.text.y = element_blank(),
        plot.margin=unit(c(t = .5, r = 1, b = .5, l = .8), "lines"))

ggsave(
  filename = paste0(fig_dir, "fig_s7.pdf"),
  p,
  width = 6,
  height = 3
)

## ---- Figure S8: Expert predictions for Study 1 outcomes ----
plot_dat <-
  expert_dat %>%
  select(guess_voice:guess_reparations) %>%
  pivot_longer(
    cols = guess_voice:guess_reparations,
    names_to = "outcome",
    values_to = "y"
  ) %>%
  mutate(outcome = factor(
    outcome,
    levels = c("guess_infoseek", "guess_guilt", "guess_reparations", "guess_voice"),
    labels = c("Information\nSeeking", "Collective\nGuilt", "Support for\nReparations", "Voice\nReferendum")
  )) %>%
  group_by(outcome, y) %>%
  mutate(dot_pos = row_number()) %>%
  ungroup()

pct_labs <-
  plot_dat %>%
  count(outcome, y) %>%
  group_by(outcome) %>%
  mutate(pct_lab = paste0(round(n / sum(n) * 100), "%"))

p <-
  ggplot(plot_dat, aes(x = y, y = dot_pos)) +
  geom_point(size = 5, shape = 21, fill = "darkgrey", color = "black") +
  geom_text(data = pct_labs,
            aes(x = y, y = n + 1.75, label = pct_lab),
            size = 4, color = "black", inherit.aes = FALSE) +
  #facet_wrap( ~ outcome, scales = "free_x") +
  facet_row(~ outcome) + 
  scale_y_continuous(expand = expansion(add = c(0.5, 2)), breaks = NULL) +
  labs(x = NULL, y = NULL) +
  theme_tufte() +
  theme(axis.ticks.x  = element_blank(),
        axis.ticks.y  = element_blank(),
        axis.line.x   = element_line(linewidth = 0.25),
        axis.text.x   = element_text(size = 10, vjust = -0.5),
        axis.text.y   = element_blank(),
        plot.margin   = unit(c(t = .5, r = 1, b = .5, l = .8), "lines"),
        strip.text    = element_text(face = "bold", size = 11))

p


ggsave(
  filename = paste0(fig_dir, "fig_s8.pdf"),
  p,
  width = 8,
  height = 4.5
)


## ---- Figure S9: Expert predictions for Study 2 outcome ----
plot_dat <-
  expert_dat %>%
  select(guess_vignette) %>%
  na.omit() %>%
  mutate(
    y       = factor(guess_vignette, levels = c("Decrease", "No effect", "Increase")),
    outcome = "Corporate Social Responsibility"
  ) %>%
  group_by(outcome, y) %>%
  mutate(dot_pos = row_number()) %>%
  ungroup()

pct_labs <-
  plot_dat %>%
  count(outcome, y) %>%
  group_by(outcome) %>%
  mutate(pct_lab = paste0(round(n / sum(n) * 100), "%"))

p <-
  ggplot(plot_dat, aes(x = y, y = dot_pos)) +
  geom_point(size = 5, shape = 21, fill = "darkgrey", color = "black") +
  geom_text(data = pct_labs,
            aes(x = y, y = n + 1.75, label = pct_lab),
            size = 4, color = "black", inherit.aes = FALSE) +
  #facet_wrap( ~ outcome, scales = "free_x") +
  facet_row(~ outcome) + 
  scale_y_continuous(expand = expansion(add = c(0.5, 2)), breaks = NULL) +
  labs(x = NULL, y = NULL) +
  theme_tufte() +
  theme(axis.ticks.x  = element_blank(),
        axis.ticks.y  = element_blank(),
        axis.line.x   = element_line(linewidth = 0.25),
        axis.text.x   = element_text(size = 10, vjust = -0.5),
        axis.text.y   = element_blank(),
        plot.margin   = unit(c(t = .5, r = 1, b = .5, l = .8), "lines"),
        strip.text    = element_text(face = "bold", size = 11))

p

ggsave(
  filename = paste0(fig_dir, "fig_s9.pdf"),
  p,
  width = 4,
  height = 2.5
)


## ---- Session Info -----------------------------------------------------------
sessionInfo()
